Consider the cohort as a single group to start. Do not stratify by case-control status.
Stack the heatmaps for CD4. Are there obvious patterns to the antigens?
Stack the heatmaps for NOTCD4. Are there patterns? Are these patterns different from CD4?
Calculate FS/PFS for each stimulation in CD4 and compare side-by-side in boxplots and test by ANOVA. Is there a clear winner? Calculate FS/PFS for each stimulation in NOTCD4 and compare with boxplot and ANOVA. Is there a winner? Is this different than CD4?
It may also make sense to make a corrplot of the FS/PFS from the different stims. This is asking a different question though, i.e. whether a patient’s cytokine responses tend to be consistently elevated/not-elevated across multiple stims.
Regress FS/PFS against age (linear). Is there an effect of age on T cell functionality? Regress FS/PFS against days from symptom onset (linear). Is there a decrease in functionality with time? Stratify FS/PFS by sex and test by t-test (or Mann-Whitney, though sample size is large enough). Is there a difference?
Only then would you start asking the case-control questions.
library(here)
library(tidyverse)
library(COMPASS)
library(ggplot2)
library(data.table)
library(broom)
library(psych)
library(corrplot)
library(ggpubr)
library(knitr)
library(kableExtra)
library(cowplot)
library(purrr)
library(flowWorkspace)
library(GGally)
library(patchwork)
library(RColorBrewer)
library(ComplexHeatmap)
library(ggbeeswarm)
source(here::here("scripts/20200604_Helper_Functions.R"))
save_output <- FALSE
# Arial font setup. Downloaded afms from https://github.com/microsoft/microsoft-r-open/tree/ec3fd89e5fb5794bd8149905c134ad801bb61800
Arial <- Type1Font(family = "Arial",
metrics = c(here::here("data/Arial_afm/ArialMT.afm"),
here::here("data/Arial_afm/ArialMT-Bold.afm"),
here::here("data/Arial_afm/ArialMT-Italic.afm"),
here::here("data/Arial_afm/ArialMT-BoldItalic.afm")))
pdfFonts(Arial = Arial)
# Next 4 lines from Run_COMPASS.R
stims_for_compass_runs <- c("VEMP", "Spike 1", "Spike 2", "NCAP")
parent_nodes_for_compass_runs <- c("4+", "NOT4+", "8+")
stims_for_compass_runs_rep <- rep(stims_for_compass_runs, each = length(parent_nodes_for_compass_runs))
parent_nodes_for_compass_runs_rep <- rep(parent_nodes_for_compass_runs, times = length(stims_for_compass_runs))
crList <- purrr::pmap(.l = list(parent_nodes_for_compass_runs_rep,
stims_for_compass_runs_rep),
.f = function(parent, currentStim) {
currentStimForFile <- gsub(" ", "_", currentStim)
crPath <- here::here(sprintf("out/CompassOutput/%s/%s/COMPASSResult_%s_%s.rds", parent, currentStimForFile, parent, currentStimForFile))
readRDS(crPath)
}) %>%
set_names(gsub("+", "", gsub(" ", "_", paste0(parent_nodes_for_compass_runs_rep, "_", stims_for_compass_runs_rep)), fixed=TRUE))
names(crList)
## [1] "4_VEMP" "NOT4_VEMP" "8_VEMP" "4_Spike_1" "NOT4_Spike_1"
## [6] "8_Spike_1" "4_Spike_2" "NOT4_Spike_2" "8_Spike_2" "4_NCAP"
## [11] "NOT4_NCAP" "8_NCAP"
CD4RunNames <- c("4_Spike_1", "4_Spike_2", "4_NCAP", "4_VEMP")
NotCD4RunNames <- c("NOT4_Spike_1", "NOT4_Spike_2", "NOT4_NCAP", "NOT4_VEMP")
CD8RunNames <- c("8_Spike_1", "8_Spike_2", "8_NCAP", "8_VEMP")
plotCOMPASSResultStack(crList[CD4RunNames],
row_annotation = c("Stim"),
variable = "Stim",
show_rownames = FALSE,
main = "CD4 COMPASS Runs",
fontsize = 15, fontsize_row = 15, fontsize_col = 13)
## The 'threshold' filter has removed 5 categories:
## !CD107a&!CD154&!IFNg&IL17a&!IL2&!IL4/5/13&!TNFa, !CD107a&!CD154&!IFNg&!IL17a&IL2&!IL4/5/13&TNFa, !CD107a&!CD154&IFNg&!IL17a&IL2&!IL4/5/13&!TNFa, CD107a&!CD154&!IFNg&!IL17a&!IL2&!IL4/5/13&TNFa, CD107a&!CD154&!IFNg&!IL17a&IL2&!IL4/5/13&!TNFa
plotCOMPASSResultStack(crList[NotCD4RunNames],
row_annotation = c("Stim"),
variable = "Stim",
show_rownames = FALSE,
main = "Not-CD4 COMPASS Runs",
fontsize = 15, fontsize_row = 15, fontsize_col = 13)
## The 'threshold' filter has removed 4 categories:
## !CD107a&!CD154&!IFNg&IL17a&!IL2&!IL4/5/13&!TNFa, !CD107a&!CD154&!IFNg&IL17a&!IL2&IL4/5/13&!TNFa, CD107a&!CD154&!IFNg&!IL17a&!IL2&!IL4/5/13&TNFa, !CD107a&CD154&!IFNg&IL17a&!IL2&IL4/5/13&!TNFa
plotCOMPASSResultStack(crList[CD8RunNames],
row_annotation = c("Stim"),
variable = "Stim",
show_rownames = FALSE,
main = "CD8 COMPASS Runs",
fontsize = 15, fontsize_row = 15, fontsize_col = 13)
## The 'threshold' filter has removed 4 categories:
## !CD107a&!CD154&!IFNg&!IL17a&!IL2&!IL4/5/13&TNFa, !CD107a&!CD154&!IFNg&!IL17a&IL2&!IL4/5/13&!TNFa, !CD107a&!CD154&!IFNg&IL17a&!IL2&!IL4/5/13&!TNFa, !CD107a&!CD154&!IFNg&IL17a&!IL2&IL4/5/13&!TNFa
plotCOMPASSResultStack(crList[c(CD4RunNames, NotCD4RunNames)],
row_annotation = c("Stim"),
variable = "Stim",
show_rownames = FALSE,
main = "CD4 and Not-CD4 COMPASS Runs",
fontsize = 15, fontsize_row = 15, fontsize_col = 13)
## The 'threshold' filter has removed 5 categories:
## !CD107a&!CD154&!IFNg&IL17a&!IL2&!IL4/5/13&!TNFa, !CD107a&!CD154&!IFNg&!IL17a&IL2&!IL4/5/13&TNFa, CD107a&!CD154&!IFNg&!IL17a&!IL2&!IL4/5/13&TNFa, CD107a&!CD154&!IFNg&!IL17a&IL2&!IL4/5/13&!TNFa, !CD107a&CD154&!IFNg&IL17a&!IL2&IL4/5/13&!TNFa
plotCOMPASSResultStack(crList[c(CD4RunNames, CD8RunNames)],
row_annotation = c("Stim"),
variable = "Stim",
show_rownames = FALSE,
main = "CD4 and CD8 COMPASS Runs",
fontsize = 15, fontsize_row = 15, fontsize_col = 13)
## The 'threshold' filter has removed 4 categories:
## !CD107a&!CD154&!IFNg&IL17a&!IL2&!IL4/5/13&!TNFa, !CD107a&!CD154&!IFNg&!IL17a&IL2&!IL4/5/13&TNFa, CD107a&!CD154&!IFNg&!IL17a&!IL2&!IL4/5/13&TNFa, CD107a&!CD154&!IFNg&!IL17a&IL2&!IL4/5/13&!TNFa
plotCOMPASSResultStack(crList[c(CD4RunNames, NotCD4RunNames, CD8RunNames)],
row_annotation = c("Stim"),
variable = "Stim",
show_rownames = FALSE,
main = "CD4, Not-CD4, and CD8 COMPASS Runs",
fontsize = 15, fontsize_row = 15, fontsize_col = 13)
## The 'threshold' filter has removed 7 categories:
## !CD107a&!CD154&!IFNg&IL17a&!IL2&!IL4/5/13&!TNFa, !CD107a&!CD154&!IFNg&!IL17a&IL2&!IL4/5/13&TNFa, CD107a&!CD154&!IFNg&!IL17a&!IL2&!IL4/5/13&TNFa, CD107a&!CD154&!IFNg&!IL17a&IL2&!IL4/5/13&!TNFa, !CD107a&!CD154&IFNg&!IL17a&IL2&IL4/5/13&!TNFa, !CD107a&CD154&!IFNg&IL17a&!IL2&IL4/5/13&!TNFa, !CD107a&CD154&!IFNg&!IL17a&IL2&IL4/5/13&TNFa
For this next part, drop the assay controls (healthies & TRIMAs)
# First change any NA Cohort levels to "Healthy"
crList2 <- lapply(crList,
function(cr) {
cr$data$meta$Cohort <- factor(ifelse(cr$data$meta$Cohort %in%
c(NA, "Healthy control", "Healthy control 2017-2018"), "Healthy", cr$data$meta$Cohort),
levels = rev(c("Healthy", "Non-hospitalized", "Hospitalized")))
cr
})
names(crList2) <- names(crList)
crList.no_healthy <- lapply(names(crList2),
function(cr_name) {
cr <- crList2[[cr_name]]
print(sprintf("Accessing %s", cr_name))
cr$data$meta <- cr$data$meta %>%
# Filter out samples which weren't run through COMPASS
dplyr::filter(!(`SAMPLE ID` %in% setdiff(cr$data$meta$`SAMPLE ID`, rownames(cr$fit$mean_gamma))))
stopifnot(all.equal(rownames(cr$fit$mean_gamma), cr$data$meta$`SAMPLE ID`))
# And make sure that the count data only includes counts for the samples which were run through COMPASS
stopifnot(all.equal(rownames(cr$data$n_s), rownames(cr$fit$mean_gamma)))
stopifnot(all.equal(rownames(cr$data$n_u), rownames(cr$fit$mean_gamma)))
stopifnot(all.equal(names(cr$data$counts_s), rownames(cr$fit$mean_gamma)))
stopifnot(all.equal(names(cr$data$counts_u), rownames(cr$fit$mean_gamma)))
not_healthy_idx <- which(cr$data$meta$Cohort != "Healthy")
cr$data$meta <- cr$data$meta[not_healthy_idx,] %>%
mutate(Cohort = factor(Cohort, levels = c("Non-hospitalized", "Hospitalized")))
cr$fit$mean_gamma <- cr$fit$mean_gamma[not_healthy_idx,]
cr$data$n_s <- cr$data$n_s[not_healthy_idx,]
cr$data$n_u <- cr$data$n_u[not_healthy_idx,]
cr$data$counts_s <- cr$data$counts_s[not_healthy_idx]
cr$data$counts_u <- cr$data$counts_u[not_healthy_idx]
cr
})
## [1] "Accessing 4_VEMP"
## [1] "Accessing NOT4_VEMP"
## [1] "Accessing 8_VEMP"
## [1] "Accessing 4_Spike_1"
## [1] "Accessing NOT4_Spike_1"
## [1] "Accessing 8_Spike_1"
## [1] "Accessing 4_Spike_2"
## [1] "Accessing NOT4_Spike_2"
## [1] "Accessing 8_Spike_2"
## [1] "Accessing 4_NCAP"
## [1] "Accessing NOT4_NCAP"
## [1] "Accessing 8_NCAP"
names(crList.no_healthy) <- names(crList2)
# Read in the patient manifest with complete data (though it's also in the COMPASSResult object)
data_manifest <- read.csv(here::here("data/Seshadri_HAARVI_PBMC_manifest_merged_11June2020.csv"), check.names = F, stringsAsFactors = F)
# Remember 116C and 37C didn't get run through COMPASS
# 75C was not run at all
# 07B/551432 didn't get run for Spike 2
# For CD8 only: "BWT20", "15548" didn't get run for "Spike 2", "NCAP", and 15530 didn't get run for "Spike 2"
fs_pfs_df_with_manifest <- lapply(c(CD4RunNames, CD8RunNames), function(n) {
as.data.frame(COMPASS::FunctionalityScore(crList[[n]])) %>%
rownames_to_column("SAMPLE ID") %>%
rename(FS = `COMPASS::FunctionalityScore(crList[[n]])`) %>%
left_join(as.data.frame(COMPASS::PolyfunctionalityScore(crList[[n]])) %>%
rownames_to_column("SAMPLE ID") %>%
rename(PFS = `COMPASS::PolyfunctionalityScore(crList[[n]])`),
by = "SAMPLE ID") %>%
mutate(COMPASS_run = n)
} ) %>%
data.table::rbindlist() %>%
separate(COMPASS_run, into = c("parent", "STIM"), remove = F, extra = "merge") %>%
mutate(STIM = factor(recode(STIM, "Spike_1" = "S1", "Spike_2" = "S2", "NCAP" = "N", "VEMP" = "E"),
levels = c("S1", "S2", "N", "E"))) %>%
mutate("Record ID" = dplyr::recode(`SAMPLE ID`,
"HS09" = "HS9")) %>% # also, 75C did not get run
full_join(data_manifest, by = c("Record ID" = "Record ID")) %>%
dplyr::filter(!(`Record ID` %in% c("116C", "37C", "75C"))) %>%
mutate(Cohort = factor(ifelse(Cohort %in%
c(NA, "Healthy control", "Healthy control 2017-2018"), "Healthy", Cohort),
levels = c("Hospitalized", "Non-hospitalized", "Healthy"))) %>%
# drop the healthies anyway
dplyr::filter(Cohort != "Healthy") %>%
mutate(Days_Symptom_Onset_to_Visit_1 = as.numeric(`Days symptom onset to visit 1`)) %>%
dplyr::select("SAMPLE ID", "FS", "PFS", "COMPASS_run", "parent", "STIM",
"Cohort", "Age", "Sex", "Race_v2", "Days_Symptom_Onset_to_Visit_1")
unique(fs_pfs_df_with_manifest$Cohort)
## [1] Hospitalized Non-hospitalized
## Levels: Hospitalized Non-hospitalized Healthy
fs_pfs_df_with_manifest %>% dplyr::filter(is.na(FS) | is.na(`SAMPLE ID`) | is.na(STIM) | is.na(Cohort))
## Empty data.table (0 rows and 11 cols): SAMPLE ID,FS,PFS,COMPASS_run,parent,STIM...
# Note removal of points for quade.test in order to have complete block design
# CD4: remove 551432/07B
# And for CD8 need to remove more individuals for complete block design:
# !(`SAMPLE ID` %in% c("BWT20", "15548", "15530"))
# Statistics is calculated after exclusion, but all points are plotted
plot_fs_vs_stim <- function(current_parent) {
StimColors <- c("S1" = "#f0027fac", "S2" = "#fdc086ac", "N" = "#beaed4ac", "E" = "#e6e63cac")
StimOutlineColors <- c("S1" = "#f0027fdf", "S2" = "#fdc086df", "N" = "#beaed4df", "E" = "#e6e63cdf")
current_data_for_test <- fs_pfs_df_with_manifest %>%
dplyr::filter(parent == current_parent) %>%
pivot_wider(id_cols = c("SAMPLE ID"), names_from = STIM, values_from = FS) %>%
# Remove any individuals which don't have data for all 4 stims, for the purposes of having complete data for the test
na.omit() %>%
# For the pairwise wilcox test, make the data long again
pivot_longer(cols = c(S1, S2, N, E), names_to = "STIM", values_to = "FS") %>%
# mutate(STIM = factor(STIM, levels = c("S1", "S2", "N", "E"))) %>%
# It should already be in order, but make sure the SAMPLE IDs are in the same order for each STIM
arrange(`SAMPLE ID`, STIM)
quade_test <- quade.test(FS ~ STIM | `SAMPLE ID`, data = current_data_for_test)
pwt <- pairwise.wilcox.test(current_data_for_test$FS, current_data_for_test$STIM, p.adjust.method = "bonferroni")
# For the plot, don't filter out individuals who don't have data for all 4 stims
# This may result in inconsistency between the test results and the plot, but it should be pretty similar
current_data_for_plot <- fs_pfs_df_with_manifest %>%
dplyr::filter(parent == current_parent) %>%
mutate(STIM = factor(STIM, levels = c("S1", "S2", "N", "E")))
current_plot <- ggplot(current_data_for_plot, aes(STIM, FS)) +
theme_bw(base_size = 22) +
geom_hline(yintercept = 0, linetype="dashed", alpha = 0.5) +
geom_violin(aes(fill = `STIM`), draw_quantiles = c(0.5),
# All violins have the same maximum width. Overrides default of equal area
scale="width", width = 0.6) +
geom_violin(fill="transparent", draw_quantiles = c(0.25, 0.75), linetype = "dashed",
scale="width", width = 0.6) +
geom_violin(aes(color=`STIM`), fill="transparent",
scale="width", width=0.6, size=1.1) +
geom_quasirandom(size=0.1, width=0.2, varwidth=T, method="quasirandom") +
theme(axis.title.x = element_blank(),
axis.title.y = element_text(size=27),
axis.text.y = element_text(color="black", size=20),
axis.text.x = element_text(color="black", size=26),
plot.title = element_text(hjust = 0.5, size=29),
panel.grid = element_blank(),
legend.position = "none",
plot.margin = margin(0.3, 0.2, 0.1, 0.2, "cm")) +
# scale_x_discrete(expand = c(0.3, 0.3)) +
labs(y = "Functionality Score",
title = sprintf("CD%s", current_parent)) +
scale_fill_manual(values = StimColors) +
scale_color_manual(values = StimOutlineColors)
list("plot" = current_plot,
"quade_test" = quade_test,
"wilcox_test" = pwt)
}
cd4_fs_vs_stim_out <- plot_fs_vs_stim("4")
cd4_fs_vs_stim_out$quade_test$p.value
## [1] 6.408814e-32
ggplot_build(cd4_fs_vs_stim_out$plot)$layout$panel_params[[1]]$y.range
## [1] -0.005442067 0.114283406
annotation_df <- broom::tidy(cd4_fs_vs_stim_out$wilcox_test) %>%
dplyr::filter(p.value < 0.05) %>%
mutate(y_pos = c(0.118, 0.145, 0.132, 0.114),
p.text = if_else(p.value < 0.001, "p<.001", paste0("p=", sub("0.", ".", round(p.value, 3)))))
cd4_fs_vs_stim_plot <- cd4_fs_vs_stim_out$plot +
coord_cartesian(ylim = c(NA, 0.154)) +
suppressWarnings(ggsignif::geom_signif(inherit.aes=F,data=annotation_df,
aes_string(xmin="group1", xmax="group2", annotations="p.text", y_position="y_pos"),
tip_length = c(0.005, 0.005),
textsize=5,
manual = TRUE))
cd4_fs_vs_stim_plot
cd8_fs_vs_stim_out <- plot_fs_vs_stim("8")
cd8_fs_vs_stim_out$quade_test$p.value
## [1] 5.442791e-08
ggplot_build(cd8_fs_vs_stim_out$plot)$layout$panel_params[[1]]$y.range
## [1] -0.002139902 0.044937933
annotation_df <- broom::tidy(cd8_fs_vs_stim_out$wilcox_test) %>%
dplyr::filter(p.value < 0.05) %>%
mutate(y_pos = c(0.061, 0.053, 0.045),
p.text = if_else(p.value < 0.001, "p<.001", paste0("p=", sub("0.", ".", round(p.value, 3)))))
cd8_fs_vs_stim_plot <- cd8_fs_vs_stim_out$plot +
coord_cartesian(ylim = c(NA, 0.065)) +
suppressWarnings(ggsignif::geom_signif(inherit.aes=F,data=annotation_df,
aes_string(xmin="group1", xmax="group2", annotations="p.text", y_position="y_pos"),
tip_length = c(0.005, 0.005),
textsize=5,
manual = TRUE))
cd8_fs_vs_stim_plot
# Note that the above data are paired, even though it's represented as violin plots.
if(save_output) {
pdf(file=here::here("out/PostCompassPlots/FS_Plots/Fig4A_CD4_FS_vs_Stim.pdf"), width=5, height=4,
onefile = TRUE, bg = "transparent", family = "Arial", fonts = "Arial") # default unit is inches, default font is Helvetica.
cd4_fs_vs_stim_plot
dev.off()
pdf(file=here::here("out/PostCompassPlots/FS_Plots/Fig5F_CD8_FS_vs_Stim.pdf"), width=5, height=4,
onefile = TRUE, bg = "transparent", family = "Arial", fonts = "Arial") # default unit is inches, default font is Helvetica.
cd8_fs_vs_stim_plot
dev.off()
}
As we saw in the heatmaps, NCAP and Spike 1 consistently have the highest FS/PFS scores, and VEMP has the lowest.
# StimColors <- c("S1" = "#f0027f", "S2" = "#fdc086", "N" = "#beaed4", "E" = "#e6e63c")
StimColors <- c("S1" = "#f0027fac", "S2" = "#fdc086ac", "N" = "#beaed4ac", "E" = "#e6e63cac")
fs_vs_x_plot <- function(current_parent, current_stim, xaxis="Age", include_regression_line = TRUE) {
current_data <- fs_pfs_df_with_manifest %>%
dplyr::filter(parent == !!current_parent & STIM == !!current_stim)
current_plot <- ggplot(current_data, aes(!!as.symbol(xaxis), FS)) +
theme_bw(base_size = 22) +
geom_point(fill=StimColors[[current_stim]], pch=21, size=2.5) +
theme(axis.title.x = element_text(size=20),
axis.title.y = element_text(size=20),
axis.text.y = element_text(color="black", size=17),
axis.text.x = element_text(color="black", size=17),
plot.title = element_text(hjust=0.5, size=21),
panel.grid = element_blank(),
legend.position = "none",
plot.margin = margin(0.3, 0.4, 0.1, 0.2, "cm")) +
labs(title = current_stim,
x = sub("([A-Za-z]*)_.*", "\\1", xaxis))
if(include_regression_line) {
current_plot <- current_plot +
# Linear regression line with 95% CI (based on "standard error of predicted means")
# By "predicted means", we're talking about the regression line's y-value at each value of x
# Pretty sure the grey 95% CI is the "confidence interval of the prediction", as discussed here: https://statisticsbyjim.com/glossary/confidence-interval-prediction/
# So it's not about individual points (a prediction interval), but rather the accuracy of the predicted mean y-value at each x
# Help understanding CI from ?predict.lm and here: https://stackoverflow.com/questions/45742987/how-is-level-used-to-generate-the-confidence-interval-in-geom-smooth
geom_smooth(color = "black", method="lm") +
# stat_cor from ggpubr
stat_cor(method = "pearson", size=5.5)
}
current_plot
}
parents_vec <- rep(c("4", "8"), each = 4)
stim_vec <- rep(c("S1", "S2", "N", "E"), times = 2)
fs_vs_age_plot_list <- map2(parents_vec, stim_vec, fs_vs_x_plot)
names(fs_vs_age_plot_list) <- paste0(stim_vec, "_", parents_vec)
# p-values NOT adjusted:
fs_vs_age_plot_list
## $S1_4
## `geom_smooth()` using formula 'y ~ x'
##
## $S2_4
## `geom_smooth()` using formula 'y ~ x'
##
## $N_4
## `geom_smooth()` using formula 'y ~ x'
##
## $E_4
## `geom_smooth()` using formula 'y ~ x'
##
## $S1_8
## `geom_smooth()` using formula 'y ~ x'
##
## $S2_8
## `geom_smooth()` using formula 'y ~ x'
##
## $N_8
## `geom_smooth()` using formula 'y ~ x'
##
## $E_8
## `geom_smooth()` using formula 'y ~ x'
fs_vs_days_plot_list <- map2(parents_vec, stim_vec, fs_vs_x_plot, xaxis="Days_Symptom_Onset_to_Visit_1")
names(fs_vs_days_plot_list) <- paste0(stim_vec, "_", parents_vec)
fs_vs_days_plot_list
## $S1_4
## `geom_smooth()` using formula 'y ~ x'
##
## $S2_4
## `geom_smooth()` using formula 'y ~ x'
##
## $N_4
## `geom_smooth()` using formula 'y ~ x'
##
## $E_4
## `geom_smooth()` using formula 'y ~ x'
##
## $S1_8
## `geom_smooth()` using formula 'y ~ x'
##
## $S2_8
## `geom_smooth()` using formula 'y ~ x'
##
## $N_8
## `geom_smooth()` using formula 'y ~ x'
##
## $E_8
## `geom_smooth()` using formula 'y ~ x'
StimSexColors <- list("S1" = c("F" = "#bd0264ac", "M" = "#fd2898ac"),
"S2" = c("F" = "#fca654ac", "M" = "#fedab8ac"),
"N" = c("F" = "#a38dc2ac", "M" = "#d9cfe6ac"),
"E" = c("F" = "#d4d41bac", "M" = "#ecec69ac"))
StimSexOutlineColors <- list("S1" = c("F" = "#bd0264ff", "M" = "#fd2898ff"),
"S2" = c("F" = "#fca654ff", "M" = "#fedab8ff"),
"N" = c("F" = "#a38dc2ff", "M" = "#d9cfe6ff"),
"E" = c("F" = "#d4d41bff", "M" = "#ecec69ff"))
fs_vs_x_plot_categorical <- function(current_parent, current_stim, xaxis="Sex",
current_fill_colors=StimSexColors[[current_stim]],
current_outline_colors = StimSexOutlineColors[[current_stim]],
xcategorylabs = NULL) {
# current_parent <- "4"; current_stim <- "S1"; xaxis <- "Sex"; current_colors <- StimSexColors[[current_stim]]
# CohortColors <- c("Hospitalized" = "#386cb0", "Non-hospitalized" = "#7fc97f", "Healthy" = "#ebeef0")
# CohortLabs <- c("Non-hospitalized" = "NH", "Hospitalized" = "H")
# current_parent <- "8"; current_stim <- "E"; xaxis <- "Cohort"; current_colors <- CohortColors; xcategorylabs <- CohortLabs
current_data <- fs_pfs_df_with_manifest %>%
dplyr::filter(parent == !!current_parent & STIM == !!current_stim)
current_plot <- ggplot(current_data, aes(!!as.symbol(xaxis), FS)) +
theme_bw(base_size = 22) +
geom_violin(aes(fill = !!as.symbol(xaxis)), draw_quantiles = c(0.5),
# All violins have the same maximum width. Overrides default of equal area
scale="width", width = 0.6) +
geom_violin(fill="transparent", draw_quantiles = c(0.25, 0.75), linetype = "dashed",
scale="width", width = 0.6) +
geom_violin(aes(color=!!as.symbol(xaxis)), fill="transparent",
scale="width", width=0.6, size=1.1) +
geom_quasirandom(size=0.1, width=0.2, varwidth=T, method="quasirandom") +
theme(axis.title.x = element_blank(),
axis.title.y = element_text(size=20),
axis.text.y = element_text(color="black", size=17),
axis.text.x = element_text(color="black", size=20),
plot.title = element_text(hjust = 0.5, size=21),
panel.grid = element_blank(),
legend.position = "none",
plot.margin = margin(0.3, 0.2, 0.1, 0.2, "cm")) +
labs(title = current_stim) + #,
# x = sub("([A-Za-z]*)_.*", "\\1", xaxis)) +
scale_fill_manual(values = current_fill_colors) +
scale_color_manual(values = current_outline_colors)
wilcox_result <- wilcox.test(as.formula(sprintf("FS ~ %s", xaxis)), data = current_data)
p.text <- if(wilcox_result$p.value < 0.001) {
"p<.001"
} else {
paste0("p=", sub("0.", ".", round(wilcox_result$p.value, 3)))
}
y_range <- ggplot_build(current_plot)$layout$panel_params[[1]]$y.range
current_plot <- current_plot +
coord_cartesian(ylim = c(NA, y_range[[2]] + 0.07*diff(y_range))) +
annotate("text", x = 1.5, y = y_range[[2]] + 0.01*diff(y_range), label = p.text, size=5.5)
if(!is.null(xcategorylabs)) {
current_plot <- current_plot +
scale_x_discrete(labels=xcategorylabs)
}
current_plot
}
parents_vec <- rep(c("4", "8"), each = 4)
stim_vec <- rep(c("S1", "S2", "N", "E"), times = 2)
fs_vs_sex_plot_list <- map2(parents_vec, stim_vec, fs_vs_x_plot_categorical)
## Warning in wilcox.test.default(x = c(0.00823543307086614, 0.00798326771653543, :
## cannot compute exact p-value with ties
## Warning in wilcox.test.default(x = c(0.000123818897637795,
## 4.3503937007874e-05, : cannot compute exact p-value with ties
## Warning in wilcox.test.default(x = c(0.00817322834645669, 0.0078740157480315, :
## cannot compute exact p-value with ties
names(fs_vs_sex_plot_list) <- paste0(stim_vec, "_", parents_vec)
fs_vs_sex_plot_list
## $S1_4
##
## $S2_4
##
## $N_4
##
## $E_4
##
## $S1_8
##
## $S2_8
##
## $N_8
##
## $E_8
In many of the boxplots above, Males are shifted slightly higher on the FS/PFS axis compared to Females, but none of the comparisons are significant.
Look at the heatmaps again. Also focus on IFNg
Stack the plots from the different stims, this time with IFNg subsets on the RHS
A commonality to the subsets which are enriched in Hospitalized individuals is CD154.
cytokine_annotation_colors <- rep("black", 30)
cytokine_order_for_annotation = c("CD154", "IL2", "TNFa", "CD107a", "IL4/5/13", "IL17a", "IFNg")
# Adapt code from COMPASS::plotCompassResultStack to prepare the data for plotting
# First we use mergeMatricesForPlotCOMPASSResultStack to merge the data from the different runs together.
# Then use the merged data to create a fake merged COMPASSResult object with the merged categories, metadata, and mean_gamma data frames
# This should allow us to use some of the customized options in plot.COMPASSResult.ComplexHeatmap (e.g. to get the IFNg subsets on the RHS)
mergeMatricesForPlotCOMPASSResultStack_tmp <- utils::getFromNamespace("mergeMatricesForPlotCOMPASSResultStack", "COMPASS")
CohortColors_forHeatmap <- c("Conv Hosp" = "#707ed4", "Conv Non-Hosp" = "#c1ddd7", "Assay Controls" = "#ebeef0")
StimColors <- c("S1" = "#f0027f", "S2" = "#fdc086", "NCAP" = "#beaed4", "VEMP" = "#e6e63c") # https://colorbrewer2.org/?type=qualitative&scheme=Accent&n=6
row_ann_colors_v2 <- list(`Stim` = StimColors, `Cohort` = CohortColors_forHeatmap)
# CD4 runs
cd4_data2plot <- mergeMatricesForPlotCOMPASSResultStack_tmp(crList2[CD4RunNames],
threshold=0.01,
minimum_dof=1,
maximum_dof=Inf,
row_annotation = c("Stim", "Cohort"),
variable = "Stim")
## The 'threshold' filter has removed 5 categories:
## !CD107a&!CD154&!IFNg&IL17a&!IL2&!IL4/5/13&!TNFa, !CD107a&!CD154&!IFNg&!IL17a&IL2&!IL4/5/13&TNFa, !CD107a&!CD154&IFNg&!IL17a&IL2&!IL4/5/13&!TNFa, CD107a&!CD154&!IFNg&!IL17a&!IL2&!IL4/5/13&TNFa, CD107a&!CD154&!IFNg&!IL17a&IL2&!IL4/5/13&!TNFa
if(save_output) {
saveRDS(cd4_data2plot, here::here("processed_data/20200815_Merged_CD4_COMPASS_Data.rds"))
}
cd4_merged_COMPASSResult <- crList2$`4_Spike_1`
cd4_merged_COMPASSResult$fit$mean_gamma <- cd4_data2plot$MMerged
cd4_merged_COMPASSResult$fit$categories <- cd4_data2plot$catsMerged %>% mutate_all(~ as.numeric(as.character(.))) %>% mutate(Counts = rowSums(.))
rownames(cd4_merged_COMPASSResult$fit$categories) <- rownames(cd4_data2plot$catsMerged)
cd4_merged_COMPASSResult$data$meta <- cd4_data2plot$rowannMerged
cd4_merged_COMPASSResult$data$meta$Run_SAMPLE_ID <- rownames(cd4_merged_COMPASSResult$data$meta)
cd4_merged_COMPASSResult$data$meta$Stim <- factor(dplyr::recode(sub("^4_", "", cd4_merged_COMPASSResult$data$meta$Stim), "Spike_1" = "S1", "Spike_2" = "S2"), levels = c("S1", "S2", "NCAP", "VEMP"))
cd4_merged_COMPASSResult$data$meta$Cohort <- factor(recode(cd4_merged_COMPASSResult$data$meta$Cohort,
"Hospitalized" = "Conv Hosp",
"Non-hospitalized" = "Conv Non-Hosp",
"Healthy" = "Assay Controls"),
levels = c("Conv Hosp", "Conv Non-Hosp", "Assay Controls"))
cd4_merged_COMPASSResult$data$individual_id <- "Run_SAMPLE_ID"
cd4_heatmap_by_ifng <- plot.COMPASSResult.ComplexHeatmap(cr = cd4_merged_COMPASSResult,
row_annotation = c("Stim", "Cohort"),
cytokine_order_for_annotation = cytokine_order_for_annotation,
dichotomize_by_cytokine = "IFNg",
dichotomize_by_cytokine_color = "#999999",
row_annotation_colors = row_ann_colors_v2,
staircase_cytokine_annotation = TRUE)
draw(cd4_heatmap_by_ifng, gap = unit(0.1, "in"), merge_legends = T)
cd4_heatmap_by_ifng_gap <- plot.COMPASSResult.ComplexHeatmap(cr = cd4_merged_COMPASSResult,
row_annotation = c("Stim", "Cohort"),
cytokine_order_for_annotation = cytokine_order_for_annotation,
dichotomize_by_cytokine = "IFNg",
dichotomize_by_cytokine_color = "#999999",
row_annotation_colors = row_ann_colors_v2,
staircase_cytokine_annotation = TRUE,
row_gap = unit(0.05, "in"))
# draw(cd4_heatmap_by_ifng_gap, gap = unit(0, "in"), merge_legends = T)
# CD8 runs
cd8_data2plot <- mergeMatricesForPlotCOMPASSResultStack_tmp(crList2[CD8RunNames],
threshold=0.01,
minimum_dof=1,
maximum_dof=Inf,
row_annotation = c("Stim", "Cohort"),
variable = "Stim")
## The 'threshold' filter has removed 4 categories:
## !CD107a&!CD154&!IFNg&!IL17a&!IL2&!IL4/5/13&TNFa, !CD107a&!CD154&!IFNg&!IL17a&IL2&!IL4/5/13&!TNFa, !CD107a&!CD154&!IFNg&IL17a&!IL2&!IL4/5/13&!TNFa, !CD107a&!CD154&!IFNg&IL17a&!IL2&IL4/5/13&!TNFa
if(save_output) {
saveRDS(cd8_data2plot, here::here("processed_data/20200815_Merged_CD8_COMPASS_Data.rds"))
}
cd8_merged_COMPASSResult <- crList2$`8_Spike_1`
cd8_merged_COMPASSResult$fit$mean_gamma <- cd8_data2plot$MMerged
cd8_merged_COMPASSResult$fit$categories <- cd8_data2plot$catsMerged %>% mutate_all(~ as.numeric(as.character(.))) %>% mutate(Counts = rowSums(.))
rownames(cd8_merged_COMPASSResult$fit$categories) <- rownames(cd8_data2plot$catsMerged)
cd8_merged_COMPASSResult$data$meta <- cd8_data2plot$rowannMerged
cd8_merged_COMPASSResult$data$meta$Run_SAMPLE_ID <- rownames(cd8_merged_COMPASSResult$data$meta)
cd8_merged_COMPASSResult$data$meta$Stim <- factor(dplyr::recode(sub("^8_", "", cd8_merged_COMPASSResult$data$meta$Stim), "Spike_1" = "S1", "Spike_2" = "S2"), levels = c("S1", "S2", "NCAP", "VEMP"))
cd8_merged_COMPASSResult$data$meta$Cohort <- factor(recode(cd8_merged_COMPASSResult$data$meta$Cohort,
"Hospitalized" = "Conv Hosp",
"Non-hospitalized" = "Conv Non-Hosp",
"Healthy" = "Assay Controls"),
levels = c("Conv Hosp", "Conv Non-Hosp", "Assay Controls"))
cd8_merged_COMPASSResult$data$individual_id <- "Run_SAMPLE_ID"
cd8_heatmap_by_ifng <- plot.COMPASSResult.ComplexHeatmap(cr = cd8_merged_COMPASSResult,
row_annotation = c("Stim", "Cohort"),
cytokine_order_for_annotation = cytokine_order_for_annotation,
dichotomize_by_cytokine = "IFNg",
dichotomize_by_cytokine_color = "#999999",
row_annotation_colors = row_ann_colors_v2,
staircase_cytokine_annotation = TRUE)
draw(cd8_heatmap_by_ifng, gap = unit(0.1, "in"), merge_legends = T)
cd8_heatmap_by_ifng_gap <- plot.COMPASSResult.ComplexHeatmap(cr = cd8_merged_COMPASSResult,
row_annotation = c("Stim", "Cohort"),
cytokine_order_for_annotation = cytokine_order_for_annotation,
dichotomize_by_cytokine = "IFNg",
dichotomize_by_cytokine_color = "#999999",
row_annotation_colors = row_ann_colors_v2,
staircase_cytokine_annotation = TRUE,
row_gap = unit(0.05, "in"))
# draw(cd8_heatmap_by_ifng_gap, gap = unit(0, "in"), merge_legends = T)
if(save_output) {
pdf(file=here::here("out/PostCompassPlots/20200902_CD4_COMPASS_Heatmap.pdf"), width=9, height=8,
onefile = TRUE, bg = "transparent", family = "Arial", fonts = "Arial") # default unit is inches, default font is Helvetica.
draw(cd4_heatmap_by_ifng, gap = unit(0.1, "in"), merge_legends = T)
dev.off()
pdf(file=here::here("out/PostCompassPlots/20200902_CD8_COMPASS_Heatmap.pdf"), width=5, height=8,
onefile = TRUE, bg = "transparent", family = "Arial", fonts = "Arial") # default unit is inches, default font is Helvetica
draw(cd8_heatmap_by_ifng, gap = unit(0.1, "in"), merge_legends = T)
dev.off()
# With row gaps
pdf(file=here::here("out/PostCompassPlots/20200902_CD4_COMPASS_Heatmap_gap.pdf"), width=9, height=9,
onefile = TRUE, bg = "transparent", family = "Arial", fonts = "Arial") # default unit is inches, default font is Helvetica.
draw(cd4_heatmap_by_ifng_gap, gap = unit(0, "in"), merge_legends = T)
dev.off()
pdf(file=here::here("out/PostCompassPlots/20200902_CD8_COMPASS_Heatmap_gap.pdf"), width=5, height=9,
onefile = TRUE, bg = "transparent", family = "Arial", fonts = "Arial") # default unit is inches, default font is Helvetica
draw(cd8_heatmap_by_ifng_gap, gap = unit(0, "in"), merge_legends = T)
dev.off()
# Later run in linux terminal:
# convert -density 300 20200902_CD4_COMPASS_Heatmap_Edit.pdf -quality 90 20200902_CD4_COMPASS_Heatmap_Edit.png
}
CohortColors <- c("Hospitalized" = "#757bbcb2", "Non-hospitalized" = "#b0d2c8bf")
CohortOutlineColors <- c("Hospitalized" = "#757bbbff", "Non-hospitalized" = "#b0d1c8ff")
CohortLabs <- c("Hospitalized" = "H", "Non-hospitalized" = "NH")
parents_vec <- rep(c("4", "8"), each = 4)
stim_vec <- rep(c("S1", "S2", "N", "E"), times = 2)
fs_vs_cohort_plot_list <- map2(parents_vec, stim_vec, fs_vs_x_plot_categorical,
xaxis = "Cohort", current_fill_colors = CohortColors,
current_outline_colors = CohortOutlineColors, xcategorylabs = CohortLabs)
## Warning in wilcox.test.default(x = c(0.00823543307086614, 0.00870807086614173, :
## cannot compute exact p-value with ties
## Warning in wilcox.test.default(x = c(0.000123818897637795,
## 0.00426889763779528, : cannot compute exact p-value with ties
## Warning in wilcox.test.default(x = c(0.00817322834645669, 0.00848228346456693, :
## cannot compute exact p-value with ties
names(fs_vs_cohort_plot_list) <- paste0(stim_vec, "_", parents_vec)
fs_vs_cohort_plot_list
## $S1_4
##
## $S2_4
##
## $N_4
##
## $E_4
##
## $S1_8
##
## $S2_8
##
## $N_8
##
## $E_8
## Warning in regularize.values(x, y, ties, missing(ties)): collapsing to unique
## 'x' values
## Warning in regularize.values(x, y, ties, missing(ties)): collapsing to unique
## 'x' values
# FS vs demographics plots to disk
if(save_output) {
pdf(file=here::here("out/PostCompassPlots/FS_Plots/Fig4CDEF_CD4_FS_vs_Demographics.pdf"), width=14, height=14,
onefile = TRUE, bg = "transparent", family = "Arial", fonts = "Arial")
(fs_vs_age_plot_list$S1_4 | fs_vs_age_plot_list$S2_4 | fs_vs_age_plot_list$N_4 | fs_vs_age_plot_list$E_4) /
(fs_vs_sex_plot_list$S1_4 | fs_vs_sex_plot_list$S2_4 | fs_vs_sex_plot_list$N_4 | fs_vs_sex_plot_list$E_4) /
(fs_vs_days_plot_list$S1_4 | fs_vs_days_plot_list$S2_4 | fs_vs_days_plot_list$N_4 | fs_vs_days_plot_list$E_4) /
(fs_vs_cohort_plot_list$S1_4 | fs_vs_cohort_plot_list$S2_4 | fs_vs_cohort_plot_list$N_4 | fs_vs_cohort_plot_list$E_4)
dev.off()
pdf(file=here::here("out/PostCompassPlots/FS_Plots/Fig5HIJK_CD8_FS_vs_Demographics.pdf"), width=14, height=14,
onefile = TRUE, bg = "transparent", family = "Arial", fonts = "Arial")
(fs_vs_age_plot_list$S1_8 | fs_vs_age_plot_list$S2_8 | fs_vs_age_plot_list$N_8 | fs_vs_age_plot_list$E_8) /
(fs_vs_sex_plot_list$S1_8 | fs_vs_sex_plot_list$S2_8 | fs_vs_sex_plot_list$N_8 | fs_vs_sex_plot_list$E_8) /
(fs_vs_days_plot_list$S1_8 | fs_vs_days_plot_list$S2_8 | fs_vs_days_plot_list$N_8 | fs_vs_days_plot_list$E_8) /
(fs_vs_cohort_plot_list$S1_8 | fs_vs_cohort_plot_list$S2_8 | fs_vs_cohort_plot_list$N_8 | fs_vs_cohort_plot_list$E_8)
dev.off()
}
source(here::here("scripts/20200604_Helper_Functions.R"))
# Arial is used by dotplot function below
# Note p-values are bonferroni adjusted in dotplots
# Also note that some points are not shown in order to zoom in better to the bulk of the point
bg_corr_dotplots <- pmap(.l = list(c(CD4RunNames, CD8RunNames),
rep(c("CD4+", "CD8"), each = 4),
list(NULL, c(0, 0.0085), NULL, NULL,
NULL, NULL, c(0, 0.005), NULL),
c(5, 5, 5, 5,
5, 5, 5, 5)),
.f = function(n, p, y, p_text_size) {
# "staircase effect" for categories df heatmap gets done automatically, unlike in heatmap
# Returned dotplot is cowplot
make_dotplot_for_COMPASS_run(crList.no_healthy[[n]], n, parentSubset = p,
return_output = T, current_ylim = y, include_0_line=T,
cytokine_order_for_annotation=cytokine_order_for_annotation,
p_text_size=p_text_size, group_by_colors=CohortColors,
zeroed_BgCorr_stats = FALSE, zeroed_BgCorr_plot = TRUE,
point_size = 1.2, dichotomize_by_cytokine = "IFNg")
})
## `summarise()` regrouping output by 'Cohort' (override with `.groups` argument)
## Joining, by = "BooleanSubset"
## Joining, by = "BooleanSubset"
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` regrouping output by 'Cohort' (override with `.groups` argument)
## Joining, by = "BooleanSubset"
## Joining, by = "BooleanSubset"
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` regrouping output by 'Cohort' (override with `.groups` argument)
## Joining, by = "BooleanSubset"
## Joining, by = "BooleanSubset"
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` regrouping output by 'Cohort' (override with `.groups` argument)
## Joining, by = "BooleanSubset"
## Joining, by = "BooleanSubset"
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` regrouping output by 'Cohort' (override with `.groups` argument)
## Joining, by = "BooleanSubset"
## Joining, by = "BooleanSubset"
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` regrouping output by 'Cohort' (override with `.groups` argument)
## Joining, by = "BooleanSubset"
## Joining, by = "BooleanSubset"
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` regrouping output by 'Cohort' (override with `.groups` argument)
## Joining, by = "BooleanSubset"
## Joining, by = "BooleanSubset"
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` regrouping output by 'Cohort' (override with `.groups` argument)
## Joining, by = "BooleanSubset"
## Joining, by = "BooleanSubset"
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
names(bg_corr_dotplots) <- c(CD4RunNames, CD8RunNames)
if(save_output) {
# Save the output of make_dotplot_for_COMPASS_run to disk so we can use the extracted data for follow-up analysis
saveRDS(bg_corr_dotplots, here::here("processed_data/20200824_make_dotplot_for_COMPASS_run_list.rds"))
}
for(n in names(bg_corr_dotplots)) {
print(plot_grid(ggplot() +
theme_void() +
geom_text(aes(0,0,label=sprintf("CD%s %s", sub("([48]).*", "\\1", n), sub("_", " ", sub("[48]_(.*)", "\\1", n)))), size=10) +
xlab(NULL),
bg_corr_dotplots[[n]]$Dotplot,
ncol=1, rel_heights = c(0.1, 1)))
}
# How many points are out of bounds of the plot for plots where boundaries were specified?
bg_corr_dotplots$`4_Spike_2`$BgCorrMagnitudes %>% dplyr::filter_if(is.numeric, any_vars(. > 0.0085)) %>%
select(-Individual, -Cohort) %>%
select_if(~max(.) > 0.0085)
## # A tibble: 1 x 1
## `!IL2&IL4/5/13&!IFNg&!TNFa&!IL17a&!CD154&!CD107a`
## <dbl>
## 1 0.0158
bg_corr_dotplots$`8_NCAP`$BgCorrMagnitudes %>% dplyr::filter_if(is.numeric, any_vars(. > 0.005)) %>%
select(-Individual, -Cohort) %>%
select_if(~max(.) > 0.005)
## # A tibble: 1 x 3
## `!IL2&!IL4/5/13&IFNg&!TNF… `!IL2&!IL4/5/13&IFNg&TNF… `!IL2&!IL4/5/13&IFNg&TNF…
## <dbl> <dbl> <dbl>
## 1 0.0105 0.0129 0.00746
if(save_output) {
pdf(file=here::here("out/PostCompassPlots/Magnitude_Plots/FigS5A_CD4_S1_COMPASS_Magnitudes_vs_Cohort.pdf"), width=12, height=7,
onefile = TRUE, bg = "transparent", family = "Arial", fonts = "Arial")
bg_corr_dotplots[["4_Spike_1"]]$Dotplot
dev.off()
pdf(file=here::here("out/PostCompassPlots/Magnitude_Plots/FigS5B_CD4_S2_COMPASS_Magnitudes_vs_Cohort.pdf"), width=11.5, height=7,
onefile = TRUE, bg = "transparent", family = "Arial", fonts = "Arial")
bg_corr_dotplots[["4_Spike_2"]]$Dotplot
dev.off()
pdf(file=here::here("out/PostCompassPlots/Magnitude_Plots/FigS5C_CD4_N_COMPASS_Magnitudes_vs_Cohort.pdf"), width=14, height=7,
onefile = TRUE, bg = "transparent", family = "Arial", fonts = "Arial")
bg_corr_dotplots[["4_NCAP"]]$Dotplot
dev.off()
pdf(file=here::here("out/PostCompassPlots/Magnitude_Plots/FigS5D_CD4_E_COMPASS_Magnitudes_vs_Cohort.pdf"), width=3, height=7,
onefile = TRUE, bg = "transparent", family = "Arial", fonts = "Arial")
bg_corr_dotplots[["4_VEMP"]]$Dotplot
dev.off()
}
sig_tests_tally <- lapply(names(bg_corr_dotplots), function(n) {length(which(bg_corr_dotplots[[n]]$Test_Results$p.adj < 0.05))})
names(sig_tests_tally) <- names(bg_corr_dotplots)
sig_tests_tally # make sure all significant comparisons were plotted and accounted for
## $`4_Spike_1`
## [1] 9
##
## $`4_Spike_2`
## [1] 4
##
## $`4_NCAP`
## [1] 5
##
## $`4_VEMP`
## [1] 0
##
## $`8_Spike_1`
## [1] 0
##
## $`8_Spike_2`
## [1] 0
##
## $`8_NCAP`
## [1] 0
##
## $`8_VEMP`
## [1] 0
Save to disk: background-corrected magnitudes from subsets which were significantly different across hospitalization status. For integrated analysis with other T cell and Ab data.
if(save_output) {
signif_bgcorr_dat_list <- lapply(c(CD4RunNames, CD8RunNames), function(runName) {
signif_subset_names <- bg_corr_dotplots[[runName]]$Test_Results %>% dplyr::filter(p.adj < 0.05) %>% dplyr::pull(BooleanSubset)
bg_corr_dotplots[[runName]]$BgCorrMagnitudes %>%
rename("SAMPLE_ID" = "Individual") %>%
dplyr::select(c("SAMPLE_ID", signif_subset_names)) %>%
rename_at(vars(signif_subset_names), ~ paste("CD", sub("Spike_", "S", runName), " ", ., sep = ""))
})
names(signif_bgcorr_dat_list) <- c(CD4RunNames, CD8RunNames)
bgcorr_dat_2save <- purrr::reduce(signif_bgcorr_dat_list, full_join, by = "SAMPLE_ID") %>%
# Convert proportions into percents
# Convert the numeric columns to character type after rounding to 20 digits. This will help ensure consistency when writing and reading the data to a file
mutate_at(vars(-"SAMPLE_ID"), ~ format(. * 100, digits = 20))
write.csv(bgcorr_dat_2save, here::here("processed_data/20200813_HAARVI_Signif_COMPASS_Subsets_Background_Corrected_Percents_pt1.csv"), row.names = F)
# bgcorr_dat_2save <- read.csv(here::here("processed_data/20200813_HAARVI_Signif_COMPASS_Subsets_Background_Corrected_Percents_pt1.csv"), stringsAsFactors = F, check.names = F)
# all.equal(bgcorr_dat_2save %>% mutate_at(vars(-"SAMPLE_ID"), as.numeric),
# read.csv(here::here("processed_data/20200813_HAARVI_Signif_COMPASS_Subsets_Background_Corrected_Percents_pt1.csv"), stringsAsFactors = F, check.names = F))
}